{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Second attempt at learning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import plotly.offline as plt\n",
    "import plotly.graph_objs as go\n",
    "plt.init_notebook_mode()\n",
    "import os\n",
    "import random\n",
    "import math\n",
    "from tqdm import tqdm\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from scipy.stats import spearmanr\n",
    "import pymc3 as pm\n",
    "import theano\n",
    "theano.config.compute_test_value = 'raise'\n",
    "%matplotlib inline\n",
    "from multiprocessing import Pool, cpu_count\n",
    "\n",
    "SELECTED_DATA_DIR = \"../selected-data/\"\n",
    "MOVIES_FILE = \"best_movie_ratings_features_engineered.csv\"\n",
    "USERS_FILE = \"users_ratings.csv\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "movies = pd.read_csv(SELECTED_DATA_DIR + MOVIES_FILE, index_col=0)\n",
    "movies.rating = movies.rating/10\n",
    "movies.sample()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "users = pd.read_csv(SELECTED_DATA_DIR + USERS_FILE, index_col=0)\n",
    "users.rating = users.rating/10\n",
    "users.sample()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Learning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def compute_utility(user_features, movie_features, epoch, s):\n",
    "    \"\"\" Compute utility U based on user preferences and movie preferences \"\"\"\n",
    "    res = user_features.dot(movie_features) * (1 - math.exp(-epoch/s))\n",
    "    return res\n",
    "\n",
    "def get_movie_features(movie):\n",
    "    \"\"\" selected features from dataframe \"\"\"\n",
    "    if isinstance(movie, pd.Series):\n",
    "        return movie[-50:]\n",
    "    elif isinstance(movie, pd.DataFrame):\n",
    "        return get_movie_features(movie.loc[movie.index[0]])\n",
    "    else:\n",
    "        raise TypeError(\"{} should be a Series or DataFrame\".format(movie))\n",
    "    \n",
    "def best_recommandation(user_features, movies, epoch, s):\n",
    "    \"\"\" Return the movie with the highest utility \"\"\"\n",
    "    utilities = np.zeros(movies.shape[0])\n",
    "    for i, (title, movie) in enumerate(movies.iterrows()):\n",
    "        movie_features = get_movie_features(movie)\n",
    "        utilities[i] = compute_utility(user_features, movie_features, epoch - movie.last_t, s)\n",
    "    return movies[movies.index == movies.index[utilities.argmax()]]\n",
    "\n",
    "def all_recommandation(user_features, movies):\n",
    "    \"\"\" Return all movies sorted by utility \"\"\"\n",
    "    movies = movies.copy()\n",
    "    movies['utilities'] = movies.apply(lambda mov: compute_utility(user_features, get_movie_features(mov), 1000), axis=1)\n",
    "    return movies.sort_values(by=\"utilities\")\n",
    "\n",
    "def random_choice(user_features, movies, epoch, s):\n",
    "    \"\"\" random approach to the problem, always exploring\"\"\"\n",
    "    return movies.sample()\n",
    "\n",
    "def greedy_choice(user_features, movies, epoch, s):\n",
    "    \"\"\" greedy approach to the problem \"\"\"\n",
    "    epsilon = 1 / math.sqrt(epoch+1)\n",
    "    if random.random() > epsilon: # choose the best\n",
    "        return best_recommandation(user_features, movies, epoch, s)\n",
    "    else:\n",
    "        return movies.sample()\n",
    "\n",
    "def greedy_choice_no_t(user_features, movies, epsilon=0.5):\n",
    "    \"\"\" greedy approach to the problem \"\"\"\n",
    "    if random.random() > epsilon: # choose the best\n",
    "        return best_recommandation(user_features, movies)\n",
    "    else:\n",
    "        return movies.sample()\n",
    "        \n",
    "def iterative_mean(old, new, t):\n",
    "    \"\"\" Compute the new mean \"\"\"\n",
    "    return ((t-1) / t) * old + (1/t) * new\n",
    "    \n",
    "def update_features(user_features, movie_features, rating, t):\n",
    "    \"\"\" update the user preferen \"\"\"\n",
    "    return iterative_mean(user_features, movie_features * rating, t+1)\n",
    "\n",
    "def reinforcement_learning(user, moviestc, choicef = greedy_choice, s=200, N=20):\n",
    "    user_features = np.zeros(moviestc.shape[1] - 2)\n",
    "    movies = moviestc.copy()\n",
    "    movies = movies[movies.columns.difference([\"votes\", \"rating\"])]\n",
    "    movies.insert(0, 'last_t', np.ones(movies.shape[0]).astype(np.int64))\n",
    "    movies.insert(0, 't', [i for i in range(movies.shape[0])])\n",
    "    movies.insert(0, 'rating', user.rating)\n",
    "    regret = 0\n",
    "    for t in range(N):\n",
    "        recommandation = choicef(user_features, movies, t+1, s)\n",
    "        recommandation_features = get_movie_features(recommandation)\n",
    "        user_rating = user.get_value(recommandation.index[0], \"rating\")\n",
    "        user_features = update_features(user_features, recommandation_features, user_rating, t)\n",
    "        utility = compute_utility(user_features, recommandation_features, t, s)\n",
    "        regret += user_rating - utility\n",
    "        movies.loc[movies.index.isin(recommandation.index),'last_t'] = t\n",
    "    return regret"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## One user"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Selection one user randomly"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "user = users[users.user.isin(users.user.sample())]\n",
    "user.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# get only movies that this user rated\n",
    "movies_user = movies[movies.index.isin(user.index)]\n",
    "movies_user.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Score : regret"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "regret_greedy = reinforcement_learning(user, movies_user, greedy_choice)\n",
    "regret_random = reinforcement_learning(user, movies_user, random_choice)\n",
    "print(regret_greedy, regret_random)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Multiple users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def wrapper_rl_one_user(args):\n",
    "    return reinforcement_learning(*args)\n",
    "\n",
    "def rl_multiple_users(users, movies, algorithms=[greedy_choice, random_choice], s=200, N=20, N_USER=50):\n",
    "    regrets = []\n",
    "    users_sample = users[users.user.isin(pd.Series(users.user.unique()).sample(N_USER))].copy()\n",
    "    print(users_sample.shape)\n",
    "    movies_sample = movies[movies.index.isin(users_sample.index.unique())].copy()\n",
    "    for algo in algorithms:\n",
    "        regret_algo = []\n",
    "        args = []\n",
    "        for i, name in enumerate(users_sample.user.unique()):\n",
    "            user = users[users.user == name]\n",
    "            movies_user = movies_sample[movies_sample.index.isin(user.index)]\n",
    "            regret_algo.append(reinforcement_learning(user, movies_user, algo, s, N))\n",
    "        regrets.append(regret_algo)\n",
    "    regrets = [sum(regret)/len(regret) for regret in regrets]\n",
    "    return regrets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "for s in [0.05, 0.5, 1, 2, 3, 4, 5, 10, 100, 1000]:\n",
    "    regrets = rl_multiple_users(users, movies)\n",
    "    print(s , \" : \", *regrets)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Un s entre 1 et 2 semble optimal"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Try Bayesian inference"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### With scoring to evaluate performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "c0 = 10\n",
    "d0 = 3\n",
    "e0 = 0.01\n",
    "f0 = 0.001\n",
    "g0 = 0.001\n",
    "\n",
    "def bayes_inference(Dl, x):\n",
    "    N = x.size - 1\n",
    "    I = np.eye(N)\n",
    "    features = x[:N]\n",
    "    epoch = x[N]\n",
    "    with pm.Model():\n",
    "        sigma = pm.InverseGamma('sigma', f0, g0)\n",
    "        # distribution of user features content\n",
    "        theta = pm.MvNormal('theta', mu=0.5, cov=c0 * sigma * I, shape=(N,))\n",
    "        # distribution of user recovery rate\n",
    "        s = pm.Gamma('s', d0, e0)\n",
    "        \n",
    "        mu = theta.dot(features) * (1 - np.exp(-epoch/s))\n",
    "\n",
    "        rating = pm.Normal('rating', mu=mu, sd=sigma, observed=Dl)\n",
    "\n",
    "        step = pm.Metropolis()\n",
    "        trace = pm.sample(1000, step=step, progressbar=False)\n",
    "\n",
    "    rating_distribution = [compute_utility(trace.theta[i], features, epoch, trace.s[i]) for i in range(len(trace.theta))]\n",
    "    #plt.plot([go.Histogram(x=rating_distribution)])\n",
    "    return random.choice(rating_distribution)\n",
    "\n",
    "def UCB_choice(user, movies, epoch, s):\n",
    "    Dl = movies[movies.index.isin(movies.index[:epoch])]\n",
    "    ratings = np.zeros(movies.shape[0])\n",
    "    for i, (title, movie) in enumerate(movies.iterrows()):\n",
    "        ratings[i] = bayes_inference(Dl, movie)\n",
    "    return movies[movies.index == movies.index[ratings.argmax()]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "#%time UCB_choice(user, movies_user, 5, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "%time rl_multiple_users(users, movies, [random_choice, greedy_choice, UCB_choice], N=10, N_USER=4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
